/******************************************************************************
Safer in School? The Impact of Compulsory Schooling on Maltreatment and Associated Harms
by Adam A. Dzulkipli, Nicole Black, David W. Johnston, and Leonie Segal

Description: This program recreates all of the Appendix figures presented in the
supplemental Online Appendix available for the paper. The figures are exported as
individual PDF files, and can then be combined together manually using a
typesetting software such as LaTeX or Microsoft Word.

Note that directory paths have been removed to preserve privacy. Please insert 
the relevant directories in the global macros under "Setup" before running the
.do file. This code was written and implemented in Stata MP 19.5, with
a computational time of approximately ~1.5 hours.

Packages required include:
•	ftools
•	reghdfe
•	estout
•	rdrobust
•	pzms
•	rddisttestk (.ado file by Brigham Frandsen available from
	https://sites.google.com/view/brighamfrandsen/software?authuser=0)
•	rddensity
•	rdhonest

*****************************************************************************/

********************************************************************************
*	Setup
********************************************************************************
clear all
set more off
macro drop all
capture log close

graph set window fontface "Times New Roman"
set scheme s2color

global raw 
global tmp 
global clean 
global output 

********************************************************************************
*	Appendix Figure B1: Bandwidth selection via cross-validation, Anker et al. (2021)
********************************************************************************

use "${clean}analysis_final.dta", clear	

/*
  Estimate cross-validation (CV) function, looping over the following outcomes:
  1. Enrolment at age 16
  2. Enrolment at age 17
  3. Any CPS notification after age 16
  4. Any ED visit at age 16
  5. Any ED visit between ages 17 to 21
*/	
* Define range of bandwidths under consideration (between 50 and 150 weeks)
local low 50
local high 150

* Generate counts for the corresponding row numbers (to store variables)
gen bandwidth = _n if inrange(_n, `low', `high')
	
* Set choice of window length for the CV procedure
local w 4 // 4 weeks or ~1 month
	
* Run CV procedure on schooling outcomes
quietly{
foreach var in enrol16 enrol17{

	gen `var'_1 = 0

	forval bw = `low'/`high'{

	gen sqe = .

	forval x = 1/`w'{
		* Estimate regression model
		reg `var' nocps_post1 anycps_post1 cps_age16_binary h3 c.h3#post1 ///
			c.h3#cps_age16_binary c.h3#post1#cps_age16_binary i.monthofbirth#cps_age16_binary ///
			if inrange(h3, -`bw' - `x', `bw' + `x') & !inrange(h3, -`x', `x') ///
			& rec_census15 == 1

		* Calculate prediction error in the `n' excluded months
		predict yhat_`var'
		replace sqe = (`var' - yhat_`var')^2 if (h3 == -`x' | h3 == `x') & rec_census15 == 1
		
		drop yhat_`var'
	}
	
	* Calculate mean-squared error (MSE) associated with the choice of bandwidth
	egen sse = sum(sqe)
	count if inrange(h3, -`w', `w') & h3 != 0 & rec_census15 == 1
	local N = r(N)
	replace `var'_1 = `var'_1 + (sse/(`N')) if bandwidth == `bw'
	
	drop sqe sse
}
}
}

* Run CV procedure on CPS outcome(s)
quietly{
foreach var in cps_after16_binary{

	gen `var'_1 = 0

	forval bw = `low'/`high'{

	gen sqe = .

	forval x = 1/`w'{
		* Estimate regression model
		reg `var' nocps_post1 anycps_post1 cps_age16_binary h3 c.h3#post1 ///
			c.h3#cps_age16_binary c.h3#post1#cps_age16_binary i.monthofbirth#cps_age16_binary ///
			if inrange(h3, -`bw' - `x', `bw' + `x') & !inrange(h3, -`x', `x')
		
		* Calculate prediction error in the `n' excluded months
		predict yhat_`var'
		replace sqe = (`var' - yhat_`var')^2 if (h3 == -`x' | h3 == `x')
		
		drop yhat_`var'
	}
	* Calculate mean-squared error (MSE) associated with the choice of bandwidth
	egen sse = sum(sqe)
	count if inrange(h3, -`w', `w') & h3 != 0
	local N = r(N)
	replace `var'_1 = `var'_1 + (sse/(`N')) if bandwidth == `bw'
	
	drop sqe sse
}
}
}

* Run CV procedure on ED outcomes
quietly{
foreach var in evered16 evered_17to21{

	gen `var'_1 = 0

	forval bw = `low'/`high'{

	gen sqe = .

	forval x = 1/`w'{
		* Estimate regression model
		reg `var' nocps_post1 anycps_post1 cps_age16_binary h3 c.h3#post1 ///
			c.h3#cps_age16_binary c.h3#post1#cps_age16_binary i.monthofbirth#cps_age16_binary ///
			if inrange(h3, -`bw' - `x', `bw' + `x') & !inrange(h3, -`x', `x')
			
		* Calculate prediction error in the `n' excluded months
		predict yhat_`var'
		replace sqe = (`var' - yhat_`var')^2 if (h3 == -`x' | h3 == `x')
		
		drop yhat_`var'
		}
		
	* Calculate mean-squared error (MSE) associated with the choice of bandwidth	
	egen sse = sum(sqe)
	count if inrange(h3, -`w', `w') & h3 != 0
	local N = r(N)
	replace `var'_1 = `var'_1 + (sse/(`N')) if bandwidth == `bw'
	
	drop sqe sse
	}
}
}

* Calculate the value of the joint CV criterion for each bandwidth
preserve
gen joint_cv = 0

local varlist enrol16 enrol17 cps_after16_binary evered16 evered_17to21

foreach var in `varlist'{
	replace joint_cv = joint_cv + `var'_1 
}

replace joint_cv = . if !inrange(bandwidth, 50, 150)

* Identify bandwidth which minimises the joint CV criterion
egen min_cv = min(joint_cv)
tab bandwidth if joint_cv == min_cv

* Plot joint CV criterion across the range of bandwdiths
twoway (scatter joint_cv bandwidth if inrange(bandwidth, 50, 150) & joint_cv != min_cv, msize(small)) ///
	(scatter joint_cv bandwidth if joint_cv == min_cv, mcolor(maroon) msymbol(Dh) ///
	msize(small)), ytitle("Cross-Validation Criterion", size(5)) ///
	 xtitle("Bandwidth (Weeks)", size(4.5)) ///
	graphregion(color(white)) xlab(50(50)150, labsize(3.5)) ///
	ylab(, nogrid labsize(3.5)) ///
	legend(off)
	
* Export graph as PDF
graph export "${output}Appendix_FigureB1.pdf", replace

********************************************************************************
*	Appendix Figure B2: Density test
********************************************************************************

use "${clean}analysis_final.dta", clear	

/*
  Use 'rddensity' package in Stata to construct density plot and perform density
  test of Cattaneo et al. (2020)
*/
rddensity h2, h(721) p(1) q(1) kernel(uniform) ///
	plot plot_range(-721 721) hist_width(1) hist_range(-721 721) ///
	histl_opt(recast(scatter) mcolor(forest_green) msize(vsmall)) histr_opt(recast(scatter) mcolor(forest_green) msize(vsmall)) ///
	esll_opt(color(black)) eslr_opt(color(black)) ///
	cirr_opt(color(gs12) fcolor(%20)) cirl_opt(color(gs12) fcolor(%20)) ///
	cilr_opt(color(gs12) fcolor(%20)) cill_opt(color(gs12) fcolor(%20)) ///
	graph_opt(legend(off) graphregion(color(white)) ///
	ylab(0(0.0002)0.0006, nogrid) ///
	xlab(-720(240)720) xscale(range(-780 780)) ///
	xline(-0.5, lwidth(thin) lcolor(black) lpattern(dash)) ///
	ytitle("Density", size(4.5) color(black)) ///
	xtitle("Days born before/after January 1993", size(4.5) color(black))) 
	
graph export "${output}Appendix_FigureB2.pdf", replace

********************************************************************************
*	Appendix Figure B3: Covariate balance across reform cut-off (graphical evidence)
********************************************************************************

use "${clean}analysis_final.dta", clear	

/*
  Calculate local averages of pre-treatment covariates by day-of-birth relative
  to the reform cut-off (i.e. 1 January 1993), grouped together in monthly bins
*/
global averages "sex adv_cong babygest adv_b_gest babywght adv_wgt mother_age adv_m_age adv_m_marstat adv_ses cps_age16_binary"

foreach var in $averages{
	bysort h1: egen `var'_mean = mean(`var')
	
	bysort h1 (h2): replace `var'_mean = . if _n != 1
}

/*
  Now we construct RD plots across the cut-off, using the covariates as outcome variables:
  1. Sex
  2. Birthweight (in grams)
  3. Low birthweight, < 2500g (binary)
  4. Gestational age (in weeks)
  5. Gestational age (binary)
  6. Any pediatric condition
  7. Mother's marital status
  8. Mother's age (in years)
  9. Mother's age < 21 years old (binary)
  10. Socioeconomic status (bottom quintile of Index of Relative Socioeconomic
	  Disadvantage)
  11. Any CPS involvement up to age 16
*/
* Define macros and graph settings
global bw1 "h2 >= -721 & h2 <= 721"
global bwr1 "h2 >= 0 & h2 <= 721"
global bwl1 "h2 < 0 & h2 >= -721"
global xtitle "Days born before/after January 1993"

global xlabel "xlab(-720(240)720, labsize(4.5)) xscale(range(-780 780))"
global graph_opt "xline(0, lwidth(thin) lpattern(dash) lcolor(black)) graphregion(color(white))"

* RD plot for any CPS involvement up to age 16
global yvar "cps_age16_binary"
global ylabel "ylab(0(0.1)0.4, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_a.pdf", replace

* RD plot for proportion of females
global yvar "sex"
global ylabel "ylab(0.3(0.1)0.7, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_b.pdf", replace

* RD plot for birthweight (in grams)
global yvar "babywght"
global ylabel "ylab(3200(100)3500, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_c.pdf", replace

* RD plot for low birthweight (< 2500g)
global yvar "adv_wgt"
global ylabel "ylab(0(0.05)0.15, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_d.pdf", replace

* RD plot for gestational age (in weeks)
global yvar "babygest"
global ylabel "ylab(38(1)40, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_e.pdf", replace

* RD plot for low gestational age (< 37 weeks)
global yvar "adv_b_gest"
global ylabel "ylab(0(0.05)0.15, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_f.pdf", replace

* RD plot for any pediatric conditions
global yvar "adv_cong"
global ylabel "ylab(0(0.05)0.1, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_g.pdf", replace

* RD plot for mother's age at birth (in years)
global yvar "mother_age"
global ylabel "ylab(26(1)29, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_h.pdf", replace

* RD plot for mother's age < 21 years at birth
global yvar "adv_m_age"
global ylabel "ylab(0(0.05)0.15, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)

graph export "${output}Appendix_FigureB3_i.pdf", replace

* RD plot for mother's marital status at birth (not married or de facto)
global yvar "adv_m_marstat"
global ylabel "ylab(0(0.1)0.3, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_j.pdf", replace

* RD plot for low socioeconomic status (bottom quintile of IRSD)
global yvar "adv_ses"
global ylabel "ylab(0.1(0.1)0.5, nogrid labsize(4.5))"

twoway (scatter ${yvar}_mean h2 if $bw1, mcolor(forest_green)) ///
	(lfitci ${yvar} h2 if $bwl1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfitci ${yvar} h2 if $bwr1, ciplot(rarea) fcolor(%20) lwidth(vthin)) ///
	(lfit ${yvar} h2 if $bwl1, lcolor(black)) ///
	(lfit ${yvar} h2 if $bwr1, lcolor(black)), ///
	$ylabel ///
	$xlabel xtitle($xtitle, size(5.5) color(black)) ///
	$graph_opt legend(off)
	
graph export "${output}Appendix_FigureB3_k.pdf", replace

********************************************************************************
*	Appendix Figure B4: Robustness to alternative bandwidths (school enrolment)
********************************************************************************

use "${clean}analysis_final.dta", clear	

* Restrict to public school sample (i.e. children for whom we have schooling data)
keep if rec_census15 == 1

* Define range of bandwidths for estimating RD treatment effects
local low 520
local high 920

* Generate counts for the corresponding row numbers (to store RD estimates)
gen bandwidth = _n if inrange(_n, `low', `high')
	
* Estimate RD treatment effects, store estimates, and generate bandwidth plots
local j (
local k b

foreach y in enrol16 enrol17{	
	eststo clear

	gen beta_hat_nocp = .
	gen ci_ll_nocp = .
	gen ci_ul_nocp = .
	gen beta_hat_anycp = .
	gen ci_ll_anycp = .
	gen ci_ul_anycp = .

	quietly{
		forval x = `low'(1)`high'{	
			
		* Use 'reghdfe' package to generate RD estimates for each bandwidth
		/*
		reghdfe `y' nocps_post1 anycps_post1 cps_age16_binary h2 c.h2#post1 ///
			c.h2#cps_age16_binary c.h2#post1#cps_age16_binary ///
			if abs(h2) <= `x', vce(robust) absorb(monthofbirth#cps_age16_binary)
			
		replace beta_hat_nocp = _b[nocps_post1] if bandwidth == `x'
		replace ci_ll_nocp = _b[nocps_post1] - invttail(e(df_r), 0.025)*_se[nocps_post1] if bandwidth == `x'
		replace ci_ul_nocp = _b[nocps_post1] + invttail(e(df_r), 0.025)*_se[nocps_post1] if bandwidth == `x'

		replace beta_hat_anycp = _b[anycps_post1] if bandwidth == `x'
		replace ci_ll_anycp = _b[anycps_post1] - invttail(e(df_r), 0.025)*_se[anycps_post1] if bandwidth == `x'
		replace ci_ul_anycp = _b[anycps_post1] + invttail(e(df_r), 0.025)*_se[anycps_post1] if bandwidth == `x'
		*/
		
		* Alternatively, use 'rdrobust' package (estimates are numerically equivalent)
		capture rdrobust `y' h2 if cps_age16_binary == 0, p(1) kernel(uni) covs(mb*) h(`x')

		replace beta_hat_anycp = e(tau_cl) if bandwidth == `x'
		replace ci_ll_anycp = e(ci_l_cl) if bandwidth == `x'
		replace ci_ul_anycp = e(ci_r_cl) if bandwidth == `x'
		
		capture rdrobust `y' h2 if cps_age16_binary == 1, p(1) kernel(uni) covs(mb*) h(`x')
		
		replace beta_hat_nocp = e(tau_cl) if bandwidth == `x'
		replace ci_ll_nocp = e(ci_l_cl) if bandwidth == `x'
		replace ci_ul_nocp = e(ci_r_cl) if bandwidth == `x'

			}
	}
	
	/*
	  Plot RD estimates for the non-CPS group across the range of bandwidths, and
	  highlight the optimal bandwidths selected via CV (721 days), KS (672 days),
	  and CCT (650 days)
	*/
	twoway (rline ci_ll_nocp ci_ul_nocp bandwidth, lcolor(navy8) lpattern(dash)) ///
		(line beta_hat_nocp bandwidth, lcolor(navy8)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 721, lcolor(maroon)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 721, mcolor(maroon)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 672, lcolor(dkorange)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 672, mcolor(dkorange) msymbol(triangle)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 650, lcolor(dkgreen)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 650, mcolor(dkgreen) msymbol(square)), ///
		graphregion(color(white)) legend(off) ///
		xlab(520(100)920, labsize(4.5)) ylab(-0.1(0.05)0.1, nogrid labsize(4.5)) ///
		yline(0, lcolor(black) lpattern(dash)) ///
		xtitle("Bandwidth (Days)", size(5) color(black)) ///
		ytitle("Effect Size", size(5) color(black)) 
	
	* Export graph as PDF
	local j = char(ceil(strpos("`c(alpha)'", "`j'")/2) + 97)
	graph export "${output}Appendix_FigureB4_`j'.pdf", replace
	
	/*
	  Plot RD estimates for the CPS group across the range of bandwidths, and
	  highlight the optimal bandwidths selected via CV (721 days), KS (672 days),
	  and CCT (650 days)
	*/
	twoway (rline ci_ll_anycp ci_ul_anycp bandwidth, lcolor(navy8) lpattern(dash)) ///
		(line beta_hat_anycp bandwidth, lcolor(navy8)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 721, lcolor(maroon)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 721, mcolor(maroon)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 672, lcolor(dkorange)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 672, mcolor(dkorange) msymbol(triangle)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 650, lcolor(dkgreen)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 650, mcolor(dkgreen) msymbol(square)), ///
		graphregion(color(white)) legend(off) ///
		xlab(520(100)920, labsize(4.5)) ylab(-0.1(0.05)0.1, nogrid labsize(4.5)) ///
		yline(0, lcolor(black) lpattern(dash)) ///
		xtitle("Bandwidth (Days)", size(5) color(black)) ///
		ytitle("Effect Size", size(5) color(black)) 
	
	* Export graph as PDF
	local k = char(ceil(strpos("`c(alpha)'", "`k'")/2) + 97)
	graph export "${output}Appendix_FigureB4_`k'.pdf", replace
	
	drop beta_hat_nocp ci_ll_nocp ci_ul_nocp beta_hat_anycp ci_ll_anycp ci_ul_anycp
}

********************************************************************************
*	Appendix Figure B5: Robustness to alternative bandwidths (new CPS notifications)
********************************************************************************

use "${clean}analysis_final.dta", clear	

* Define range of bandwidths to estimate RD treatment effects
local low 520
local high 920

* Generate counts for the corresponding row numbers (to store the RD estimates)
gen bandwidth = _n if inrange(_n, `low', `high')
	
* Estimate RD treatment effects, store estimates, and generate bandwidth plots
local j (

foreach y in cps_after16_binary{
	eststo clear

	gen beta_hat_nocp = .
	gen ci_ll_nocp = .
	gen ci_ul_nocp = .
	gen beta_hat_anycp = .
	gen ci_ll_anycp = .
	gen ci_ul_anycp = .

	quietly{
		forval x = `low'(1)`high'{
		
		* Use 'reghdfe' package to generate RD estimates for each bandwidth
		/*
		reghdfe `y' nocps_post1 anycps_post1 cps_age16_binary h2 c.h2#post1 ///
			c.h2#cps_age16_binary c.h2#post1#cps_age16_binary ///
			if abs(h2) <= `x', vce(robust) absorb(monthofbirth#cps_age16_binary)
			
		replace beta_hat_nocp = _b[nocps_post1] if bandwidth == `x'
		replace ci_ll_nocp = _b[nocps_post1] - invttail(e(df_r), 0.025)*_se[nocps_post1] if bandwidth == `x'
		replace ci_ul_nocp = _b[nocps_post1] + invttail(e(df_r), 0.025)*_se[nocps_post1] if bandwidth == `x'

		replace beta_hat_anycp = _b[anycps_post1] if bandwidth == `x'
		replace ci_ll_anycp = _b[anycps_post1] - invttail(e(df_r), 0.025)*_se[anycps_post1] if bandwidth == `x'
		replace ci_ul_anycp = _b[anycps_post1] + invttail(e(df_r), 0.025)*_se[anycps_post1] if bandwidth == `x'
		*/
		
		* Alternatively, use 'rdrobust' package (estimates are numerically equivalent)
		capture rdrobust `y' h2 if cps_age16_binary == 0, p(1) kernel(uni) covs(mb*) h(`x')
		
		replace beta_hat_nocp = e(tau_cl) if bandwidth == `x'
		replace ci_ll_nocp = e(ci_l_cl) if bandwidth == `x'
		replace ci_ul_nocp = e(ci_r_cl) if bandwidth == `x'

		capture rdrobust `y' h2 if cps_age16_binary == 1, p(1) kernel(uni) covs(mb*) h(`x')

		replace beta_hat_anycp = e(tau_cl) if bandwidth == `x'
		replace ci_ll_anycp = e(ci_l_cl) if bandwidth == `x'
		replace ci_ul_anycp = e(ci_r_cl) if bandwidth == `x'

		}
	}
	
	/*
	  Plot RD estimates for the non-CPS group across the range of bandwidths, and
	  highlight the optimal bandwidths selected via CV (721 days), KS (672 days),
	  and CCT (650 days)
	*/
	twoway (rline ci_ll_nocp ci_ul_nocp bandwidth, lcolor(navy8) lpattern(dash)) ///
		(line beta_hat_nocp bandwidth, lcolor(navy8)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 721, lcolor(maroon)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 721, mcolor(maroon)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 672, lcolor(dkorange)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 672, mcolor(dkorange) msymbol(triangle)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 650, lcolor(dkgreen)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 650, mcolor(dkgreen) msymbol(square)), ///
		graphregion(color(white)) legend(off) ///
		xlab(520(100)920, labsize(4.5)) ylab(-0.02(0.01)0.02, nogrid labsize(4.5)) ///
		yline(0, lcolor(black) lpattern(dash)) ///
		xtitle("Bandwidth (Days)", size(5) color(black)) ///
		ytitle("Effect Size", size(5) color(black)) 
	
	* Export graph as PDF
	local j = char(ceil(strpos("`c(alpha)'", "`j'")/2) + 97)
	graph export "${output}Appendix_FigureB5_`j'.pdf", replace
	
	/*
	  Plot RD estimates for the CPS group across the range of bandwidths, and
	  highlight the optimal bandwidths selected via CV (721 days), KS (672 days),
	  and CCT (650 days)
	*/
	twoway (rline ci_ll_anycp ci_ul_anycp bandwidth, lcolor(navy8) lpattern(dash)) ///
		(line beta_hat_anycp bandwidth, lcolor(navy8)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 721, lcolor(maroon)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 721, mcolor(maroon)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 672, lcolor(dkorange)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 672, mcolor(dkorange) msymbol(triangle)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 650, lcolor(dkgreen)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 650, mcolor(dkgreen) msymbol(square)), ///
		graphregion(color(white)) legend(off) ///
		xlab(520(100)920, labsize(4.5)) ylab(-0.1(0.05)0.1, nogrid labsize(4.5)) ///
		yline(0, lcolor(black) lpattern(dash)) ///
		xtitle("Bandwidth (Days)", size(5) color(black)) ///
		ytitle("Effect Size", size(5) color(black)) 
	
	* Export graph as PDF
	local j = char(ceil(strpos("`c(alpha)'", "`j'")/2) + 97)
	graph export "${output}Appendix_FigureB5_`j'.pdf", replace

	drop beta_hat_nocp ci_ll_nocp ci_ul_nocp beta_hat_anycp ci_ll_anycp ci_ul_anycp
}
	

********************************************************************************
*	Appendix Figure B6: Robustness to alternative bandwidths (ED visits)
********************************************************************************

use "${clean}analysis_final.dta", clear	

* Define range of bandwidths for estimating RD treatment effects
local low 520
local high 920

* Generate counts for the corresponding row numbers (to store RD estimates)
gen bandwidth = _n if inrange(_n, `low', `high')

* Estimate RD treatment effects, store estimates, and generate bandwidth plots	
local j (
local k b

foreach y in evered16 evered_17to21{
	eststo clear

	gen beta_hat_nocp = .
	gen ci_ll_nocp = .
	gen ci_ul_nocp = .
	gen beta_hat_anycp = .
	gen ci_ll_anycp = .
	gen ci_ul_anycp = .

	quietly{
		forval x = `low'(1)`high'{
		* Use 'reghdfe' package to generate RD estimates for each bandwidth
		/*
		reghdfe `y' nocps_post1 anycps_post1 cps_age16_binary h2 c.h2#post1 ///
			c.h2#cps_age16_binary c.h2#post1#cps_age16_binary ///
			if abs(h2) <= `x', vce(robust) absorb(monthofbirth#cps_age16_binary)
			
		replace beta_hat_nocp = _b[nocps_post1] if bandwidth == `x'
		replace ci_ll_nocp = _b[nocps_post1] - invttail(e(df_r), 0.025)*_se[nocps_post1] if bandwidth == `x'
		replace ci_ul_nocp = _b[nocps_post1] + invttail(e(df_r), 0.025)*_se[nocps_post1] if bandwidth == `x'

		replace beta_hat_anycp = _b[anycps_post1] if bandwidth == `x'
		replace ci_ll_anycp = _b[anycps_post1] - invttail(e(df_r), 0.025)*_se[anycps_post1] if bandwidth == `x'
		replace ci_ul_anycp = _b[anycps_post1] + invttail(e(df_r), 0.025)*_se[anycps_post1] if bandwidth == `x'
		*/
		
		* Alternatively, use 'rdrobust' package (estimates are numerically equivalent)
		capture rdrobust `y' h2 if cps_age16_binary == 0, p(1) kernel(uni) covs(mb*) h(`x')
		
		replace beta_hat_nocp = e(tau_cl) if bandwidth == `x'
		replace ci_ll_nocp = e(ci_l_cl) if bandwidth == `x'
		replace ci_ul_nocp = e(ci_r_cl) if bandwidth == `x'

		capture rdrobust `y' h2 if cps_age16_binary == 1, p(1) kernel(uni) covs(mb*) h(`x')

		replace beta_hat_anycp = e(tau_cl) if bandwidth == `x'
		replace ci_ll_anycp = e(ci_l_cl) if bandwidth == `x'
		replace ci_ul_anycp = e(ci_r_cl) if bandwidth == `x'

		}
	
	}
	
	/*
	  Plot RD estimates for the non-CPS group across the range of bandwidths, and
	  highlight the optimal bandwidths selected via CV (721 days), KS (672 days),
	  and CCT (650 days)
	*/
	twoway (rline ci_ll_nocp ci_ul_nocp bandwidth, lcolor(navy8) lpattern(dash)) ///
		(line beta_hat_nocp bandwidth, lcolor(navy8)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 721, lcolor(maroon)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 721, mcolor(maroon)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 672, lcolor(dkorange)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 672, mcolor(dkorange) msymbol(triangle)) ///
		(rspike ci_ll_nocp ci_ul_nocp bandwidth if bandwidth == 650, lcolor(dkgreen)) ///
		(scatter beta_hat_nocp bandwidth if bandwidth == 650, mcolor(dkgreen) msymbol(square)), ///
		graphregion(color(white)) legend(off) ///
		xlab(520(100)920, labsize(4.5)) ylab(-0.04(0.02)0.04, nogrid labsize(4.5)) ///
		yline(0, lcolor(black) lpattern(dash)) ///
		xtitle("Bandwidth (Days)", size(5) color(black)) ///
		ytitle("Effect Size", size(5) color(black)) 
	
	* Export graph as PDF
	local j = char(ceil(strpos("`c(alpha)'", "`j'")/2) + 97)
	graph export "${output}Appendix_FigureB6_`j'.pdf", replace
	
	/*
	  Plot RD estimates for the CPS group across the range of bandwidths, and
	  highlight the optimal bandwidths selected via CV (721 days), KS (672 days),
	  and CCT (650 days)
	*/
	twoway (rline ci_ll_anycp ci_ul_anycp bandwidth, lcolor(navy8) lpattern(dash)) ///
		(line beta_hat_anycp bandwidth, lcolor(navy8)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 721, lcolor(maroon)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 721, mcolor(maroon)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 672, lcolor(dkorange)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 672, mcolor(dkorange) msymbol(triangle)) ///
		(rspike ci_ll_anycp ci_ul_anycp bandwidth if bandwidth == 650, lcolor(dkgreen)) ///
		(scatter beta_hat_anycp bandwidth if bandwidth == 650, mcolor(dkgreen) msymbol(square)), ///
		graphregion(color(white)) legend(off) ///
		xlab(520(100)920, labsize(4.5)) ylab(-0.1(0.05)0.1, nogrid labsize(4.5)) ///
		yline(0, lcolor(black) lpattern(dash)) ///
		xtitle("Bandwidth (Days)", size(5) color(black)) ///
		ytitle("Effect Size", size(5) color(black)) 
	
	* Export graph as PDF
	local k = char(ceil(strpos("`c(alpha)'", "`k'")/2) + 97)
	graph export "${output}Appendix_FigureB6_`k'.pdf", replace

	drop beta_hat_nocp ci_ll_nocp ci_ul_nocp beta_hat_anycp ci_ll_anycp ci_ul_anycp

}

/********************************************************************************
References:

Anker, Anne Sofie Tegner, Jennifer L. Doleac, and Rasmus Landersø, "The
	Effects of DNA Databases on the Deterrence and Detection of Offenders," American
	Economic Journal: Applied Economics, 2021, 13 (4), 194–225.
Calonico, Sebastian, Matias D. Cattaneo, and Rocio Titiunik, "Robust Nonparametric
	Confidence Intervals for Regression-Discontinuity Designs," Econometrica,
	2014, 82 (6), 2295–2326.
Cattaneo, Matias D., Michael Jansson, and Xinwei Ma, "Simple Local Polynomial
	Density Estimators," Journal of the American Statistical Association, 2020, 115 (531),
	1449–1455.
Kettlewell, Nathan and Peter Siminski, "Optimal Model Selection in RDD and Related
	Settings Using Placebo Zones," Working Paper, University of Technology Sydney
	2022.
********************************************************************************/